# load necessary libraries
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)

#load data relig_income present in tidyr package 
data(relig_income)
head(relig_income)
## # A tibble: 6 × 11
##   religion  `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
##   <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
## 1 Agnostic       27        34        60        81        76       137        122
## 2 Atheist        12        27        37        52        35        70         73
## 3 Buddhist       27        21        30        34        33        58         62
## 4 Catholic      418       617       732       670       638      1116        949
## 5 Don’t kn…      15        14        15        11        10        35         21
## 6 Evangeli…     575       869      1064       982       881      1486        949
## # ℹ 3 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## #   `Don't know/refused` <dbl>
# Reshape the dataset from wide to long format
relig_income_long <- relig_income %>%
  pivot_longer(cols = -religion, names_to = "Income_Range", values_to = "Count")

# Check the structure of the data
str(relig_income_long)
## tibble [180 × 3] (S3: tbl_df/tbl/data.frame)
##  $ religion    : chr [1:180] "Agnostic" "Agnostic" "Agnostic" "Agnostic" ...
##  $ Income_Range: chr [1:180] "<$10k" "$10-20k" "$20-30k" "$30-40k" ...
##  $ Count       : num [1:180] 27 34 60 81 76 137 122 109 84 96 ...
#visualization on relig_income dataset of tidyr package

# Bar plot
ggplot(relig_income_long, aes(x = Income_Range, y = Count, fill = religion)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Income Distribution by Religion",
       x = "Income Range", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Heat map
ggplot(relig_income_long, aes(x = Income_Range, y = religion, fill = Count)) +
  geom_tile() +
  labs(title = "Heatmap of Income Distribution by Religion",
       x = "Income Range", y = "Religion") +
  theme_minimal() +
  scale_fill_gradient(low = "white", high = "orange")

#interactive plots with plotly
p <- ggplot(relig_income, aes(x = `<$10k`, y = `$75-100k`, color = religion)) +
  geom_point(size=3) +
  labs(title = "Religion income dataset: <$10k vs $75-100k")
ggplotly(p)
# Bubble Chart
ggplot(relig_income_long, aes(x = Income_Range, y = religion, size = Count, color = religion)) +
  geom_point(alpha = 0.7) +
  scale_size(range = c(3, 20)) +
  labs(title = "Income Distribution by Religious Affiliation",
       x = "Income Level",
       y = "Religion",
       size = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Check for missing values
sum(is.na(relig_income))
## [1] 0
# Creating derived features
relig_income_enhanced <- relig_income %>%
  mutate(
    Total = rowSums(select(., -religion)),
    Proportion_Low_Income = (`<$10k` + `$10-20k`) / Total,
    Proportion_High_Income = (`$50-75k` + `$75-100k`) / Total
  )

# Standardized measurements
relig_income_standardized <- relig_income %>%
  mutate(across(where(is.numeric), scale))

# Binning continuous values
relig_income_binned <- relig_income %>%
  mutate(
    Total = rowSums(select(., -religion)),
    Total_Bin = cut(Total, breaks = 5, labels = c("Very Low", "Low", "Medium", "High", "Very High")),
    Proportion_Low_Income = (`<$10k` + `$10-20k`) / Total,
    Proportion_Low_Income_Bin = cut(Proportion_Low_Income, breaks = 5, 
                                    labels = c("Very Low", "Low", "Medium", "High", "Very High"))
  )

# Display the first few rows of each dataset
print("Enhanced Dataset:")
## [1] "Enhanced Dataset:"
print(head(relig_income_enhanced))
## # A tibble: 6 × 14
##   religion  `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
##   <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
## 1 Agnostic       27        34        60        81        76       137        122
## 2 Atheist        12        27        37        52        35        70         73
## 3 Buddhist       27        21        30        34        33        58         62
## 4 Catholic      418       617       732       670       638      1116        949
## 5 Don’t kn…      15        14        15        11        10        35         21
## 6 Evangeli…     575       869      1064       982       881      1486        949
## # ℹ 6 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## #   `Don't know/refused` <dbl>, Total <dbl>, Proportion_Low_Income <dbl>,
## #   Proportion_High_Income <dbl>
print("Standardized Dataset:")
## [1] "Standardized Dataset:"
print(head(relig_income_standardized))
## # A tibble: 6 × 11
##   religion   `<$10k`[,1] `$10-20k`[,1] `$20-30k`[,1] `$30-40k`[,1] `$40-50k`[,1]
##   <chr>            <dbl>         <dbl>         <dbl>         <dbl>         <dbl>
## 1 Agnostic        -0.475        -0.472        -0.408        -0.351        -0.352
## 2 Atheist         -0.564        -0.500        -0.482        -0.451        -0.503
## 3 Buddhist        -0.475        -0.523        -0.505        -0.513        -0.510
## 4 Catholic         1.84          1.81          1.76          1.67          1.72 
## 5 Don’t kno…      -0.546        -0.551        -0.553        -0.592        -0.595
## 6 Evangelic…       2.77          2.80          2.83          2.74          2.62 
## # ℹ 5 more variables: `$50-75k` <dbl[,1]>, `$75-100k` <dbl[,1]>,
## #   `$100-150k` <dbl[,1]>, `>150k` <dbl[,1]>, `Don't know/refused` <dbl[,1]>
print("Binned Dataset:")
## [1] "Binned Dataset:"
print(head(relig_income_binned))
## # A tibble: 6 × 15
##   religion  `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
##   <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
## 1 Agnostic       27        34        60        81        76       137        122
## 2 Atheist        12        27        37        52        35        70         73
## 3 Buddhist       27        21        30        34        33        58         62
## 4 Catholic      418       617       732       670       638      1116        949
## 5 Don’t kn…      15        14        15        11        10        35         21
## 6 Evangeli…     575       869      1064       982       881      1486        949
## # ℹ 7 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## #   `Don't know/refused` <dbl>, Total <dbl>, Total_Bin <fct>,
## #   Proportion_Low_Income <dbl>, Proportion_Low_Income_Bin <fct>
# Now we make interactive visualization and dashboard using R shiny
library(shinydashboard)
## 
## Attaching package: 'shinydashboard'
## 
## The following object is masked from 'package:graphics':
## 
##     box
library(shiny)


ui <- dashboardPage(
  dashboardHeader(title = "Religion Income Explorer"),
  dashboardSidebar(
    selectInput("x_var", "X Variable:", 
                choices = unique(relig_income_long$Income_Range)),
    selectInput("y_var", "Y Variable:", 
                choices = unique(relig_income_long$Income_Range),
                selected = "$75-100k"),
    checkboxGroupInput("religions", "Select Religions:",
                       choices = unique(relig_income_long$religion),
                       selected = unique(relig_income_long$religion)[1:5])
  ),
  dashboardBody(
    fluidRow(
      box(plotlyOutput("scatter_plot"), width = 8),
      box(plotOutput("bar_plot"), width = 4)
    ),
    fluidRow(
      box(dataTableOutput("data_table"), width = 12)
    )
  )
)
## `shiny::dataTableOutput()` is deprecated as of shiny 1.8.1.
## Please use `DT::DTOutput()` instead.
## See <https://rstudio.github.io/DT/shiny.html> for more information.
server <- function(input, output) {
  
  filtered_data <- reactive({
    relig_income_long %>%
      filter(religion %in% input$religions)
  })
  
  output$scatter_plot <- renderPlotly({
    plot_data <- filtered_data() %>%
      pivot_wider(names_from = Income_Range, values_from = Count) %>%
      select(religion, !!sym(input$x_var), !!sym(input$y_var))
    
    p <- ggplot(plot_data, aes(x = !!sym(input$x_var), y = !!sym(input$y_var), color = religion, text = religion)) +
      geom_point(size = 3) +
      labs(title = paste(input$y_var, "vs", input$x_var),
           x = input$x_var, y = input$y_var) +
      theme_minimal()
    
    ggplotly(p, tooltip = "text")
  })
  
  output$bar_plot <- renderPlot({
    ggplot(filtered_data() %>% filter(Income_Range %in% c(input$x_var, input$y_var)),
           aes(x = religion, y = Count, fill = Income_Range)) +
      geom_bar(stat = "identity", position = "dodge") +
      labs(title = "Comparison of Selected Income Ranges",
           x = "Religion", y = "Count") +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
  })
  
  output$data_table <- renderDataTable({
    filtered_data() %>%
      pivot_wider(names_from = Income_Range, values_from = Count) %>%
      arrange(desc(`$75-100k`))
  })
}


shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents